{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook was used to generate the golden data results for ECG. It requires that the python-igraph package be installed to run. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.io import mmread\n",
    "import networkx as nx\n",
    "#mmFile='/datasets/kron_g500-logn21/kron_g500-logn21.mtx'\n",
    "mmFile='/datasets/golden_data/graphs/dblp.mtx'\n",
    "#mmFile='/datasets/networks/karate.mtx'\n",
    "#mmFile='/home/jwyles/code/mycugraph/datasets/dolphins.mtx'\n",
    "#mmFile='/home/jwyles/code/mycugraph/datasets/netscience.mtx'\n",
    "M = mmread(mmFile).asfptype()\n",
    "import cugraph\n",
    "import cudf\n",
    "import numpy as np\n",
    "rows = cudf.Series(M.row)\n",
    "cols = cudf.Series(M.col)\n",
    "values = cudf.Series(M.data)\n",
    "G = cugraph.Graph()\n",
    "G.add_edge_list(rows, cols, values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 326 ms, sys: 400 ms, total: 726 ms\n",
      "Wall time: 796 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "parts = cugraph.ecg(G, .05, 16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49204"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numParts = parts['partition'].max() + 1\n",
    "numParts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.850147008895874"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mod = cugraph.analyzeClustering_modularity(G, numParts, parts['partition'])\n",
    "mod"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7506256512679915"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "parts2, mod2 = cugraph.louvain(G)\n",
    "mod2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "import igraph as ig\n",
    "import numpy as np\n",
    "\n",
    "def community_ecg(self, weights=None, ens_size=16, min_weight=0.05):\n",
    "    W = [0]*self.ecount()\n",
    "    ## Ensemble of level-1 Louvain \n",
    "    for i in range(ens_size):\n",
    "        p = np.random.permutation(self.vcount()).tolist()\n",
    "        g = self.permute_vertices(p)\n",
    "        l = g.community_multilevel(weights=weights, return_levels=True)[0].membership\n",
    "        b = [l[p[x.tuple[0]]]==l[p[x.tuple[1]]] for x in self.es]\n",
    "        W = [W[i]+b[i] for i in range(len(W))]\n",
    "    W = [min_weight + (1-min_weight)*W[i]/ens_size for i in range(len(W))]\n",
    "    part = self.community_multilevel(weights=W)\n",
    "    ## Force min_weight outside 2-core\n",
    "    core = self.shell_index()\n",
    "    ecore = [min(core[x.tuple[0]],core[x.tuple[1]]) for x in self.es]\n",
    "    part.W = [W[i] if ecore[i]>1 else min_weight for i in range(len(ecore))]\n",
    "    return part\n",
    "\n",
    "ig.Graph.community_ecg = community_ecg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "Gi = ig.Graph.Read_Edgelist('./dblp2.txt', directed=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3min 49s, sys: 1.67 s, total: 3min 51s\n",
      "Wall time: 3min 50s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "ec = Gi.community_ecg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "ecg = np.zeros(len(Gi.vs), dtype=np.int32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, ..., 0, 0, 0], dtype=int32)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ecg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(ec)):\n",
    "    for j in ec[i]:\n",
    "        ecg[j] = i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([275, 275,   0, ..., 435, 435, 107], dtype=int32)"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ecg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "ecg_col = cudf.Series(ecg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "numParts = ecg_col.max() + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "mod4 = cugraph.analyzeClustering_modularity(G, numParts, ecg_col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9279554486274719"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mod4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "34"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maxId = 0\n",
    "for i in range(len(ec)):\n",
    "    for j in ec[i]:\n",
    "        if j > maxId:\n",
    "            maxId = j\n",
    "maxId"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "156"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(Gi.es)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "156"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(M.row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "156"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "78 *2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = \"dblp2.txt\"\n",
    "f = open(filename, 'w')\n",
    "for i in range(len(M.row)):\n",
    "    f.write(str(M.row[i]) + ' ' + str(M.col[i]) + '\\n')\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "igraph.Edge(<igraph.Graph object at 0x7fb7f439cd68>, 1, {})"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Gi.es[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "igraph.Edge(<igraph.Graph object at 0x7fb7f439cd68>, 1, {})"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Gi.es.select()[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Gi.es[0].source"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Gi.es[0].target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
